def getTranslationMessageData(self, translationmessage): # Convert a TranslationMessage to TranslationMessageData object, # which is used during import. potmsgset = translationmessage.potmsgset message_data = TranslationMessageData() message_data.context = potmsgset.context message_data.msgid_singular = potmsgset.singular_text message_data.msgid_plural = potmsgset.plural_text translations = translationmessage.translations for plural_form, translation in enumerate(translations): message_data.addTranslation(plural_form, translation) return message_data
def getTranslationMessageData(self, translationmessage): # Convert a TranslationMessage to TranslationMessageData object, # which is used during import. potmsgset = translationmessage.potmsgset message_data = TranslationMessageData() message_data.context = potmsgset.context message_data.msgid_singular = potmsgset.singular_text message_data.msgid_plural = potmsgset.plural_text translations = translationmessage.translations for plural_form, translation in enumerate(translations): message_data.addTranslation(plural_form, translation) return message_data
def new_general_entity(self, name, value): """See `xmldtd.WFCDTD`.""" if not self.started: return message = TranslationMessageData() message.msgid_singular = name # CarlosPerelloMarin 20070326: xmldtd parser does an inline # parsing which means that the content is all in a single line so we # don't have a way to show the line number with the source reference. message.file_references_list = ["%s(%s)" % (self.filename, name)] message.addTranslation(TranslationConstants.SINGULAR_FORM, value) message.singular_text = value message.context = self.chrome_path message.source_comment = self.last_comment self.messages.append(message) self.started += 1 self.last_comment = None
def new_general_entity(self, name, value): """See `xmldtd.WFCDTD`.""" if not self.started: return message = TranslationMessageData() message.msgid_singular = name # CarlosPerelloMarin 20070326: xmldtd parser does an inline # parsing which means that the content is all in a single line so we # don't have a way to show the line number with the source reference. message.file_references_list = ["%s(%s)" % (self.filename, name)] message.addTranslation(TranslationConstants.SINGULAR_FORM, value) message.singular_text = value message.context = self.chrome_path message.source_comment = self.last_comment self.messages.append(message) self.started += 1 self.last_comment = None
def _fetchDBRows(self, simulate_timeout=False): msgstr_joins = [ "LEFT OUTER JOIN POTranslation AS pt%d " "ON pt%d.id = TranslationMessage.msgstr%d" % (form, form, form) for form in xrange(TranslationConstants.MAX_PLURAL_FORMS) ] translations = [ "pt%d.translation AS translation%d" % (form, form) for form in xrange(TranslationConstants.MAX_PLURAL_FORMS) ] substitutions = { 'translation_columns': ', '.join(translations), 'translation_joins': '\n'.join(msgstr_joins), 'language': quote(self.pofile.language), 'potemplate': quote(self.pofile.potemplate), 'flag': self._getFlagName(), } sql = """ SELECT POMsgId.msgid AS msgid, POMsgID_Plural.msgid AS msgid_plural, context, date_reviewed, %(translation_columns)s FROM POTMsgSet JOIN TranslationTemplateItem ON TranslationTemplateItem.potmsgset = POTMsgSet.id AND TranslationTemplateItem.potemplate = %(potemplate)s JOIN TranslationMessage ON POTMsgSet.id=TranslationMessage.potmsgset AND ( TranslationMessage.potemplate = %(potemplate)s OR TranslationMessage.potemplate IS NULL) AND TranslationMessage.language = %(language)s %(translation_joins)s JOIN POMsgID ON POMsgID.id = POTMsgSet.msgid_singular LEFT OUTER JOIN POMsgID AS POMsgID_Plural ON POMsgID_Plural.id = POTMsgSet.msgid_plural WHERE %(flag)s IS TRUE ORDER BY TranslationTemplateItem.sequence, TranslationMessage.potemplate NULLS LAST """ % substitutions cur = cursor() try: # XXX JeroenVermeulen 2010-11-24 bug=680802: We set a # timeout to work around bug 408718, but the query is # simpler now. See if we still need this. # We have to commit what we've got so far or we'll lose # it when we hit TimeoutError. transaction.commit() if simulate_timeout: # This is used in tests. timeout = '1ms' query = "SELECT pg_sleep(2)" else: timeout = 1000 * int(config.poimport.statement_timeout) query = sql cur.execute("SET statement_timeout to %s" % quote(timeout)) cur.execute(query) except TimeoutError: # XXX JeroenVermeulen 2010-11-24 bug=680802: Log this so we # know whether it still happens. transaction.abort() return rows = cur.fetchall() assert TranslationConstants.MAX_PLURAL_FORMS == 6, ( "Change this code to support %d plural forms" % TranslationConstants.MAX_PLURAL_FORMS) for row in rows: msgid, msgid_plural, context, date = row[:4] # The last part of the row is msgstr0 .. msgstr5. Store them # in a dict indexed by the number of the plural form. msgstrs = dict(enumerate(row[4:])) key = (msgid, msgid_plural, context) if key in self.current_messages: message = self.current_messages[key] else: message = TranslationMessageData() self.current_messages[key] = message message.context = context message.msgid_singular = msgid message.msgid_plural = msgid_plural for plural in xrange(TranslationConstants.MAX_PLURAL_FORMS): msgstr = msgstrs.get(plural, None) if (msgstr is not None and ((len(message.translations) > plural and message.translations[plural] is None) or (len(message.translations) <= plural))): message.addTranslation(plural, msgstr)
def parse(self, content): """Parse given content as a property file. Once the parse is done, self.messages has a list of the available `ITranslationMessageData`s. """ # .properties files are supposed to be unicode-escaped, but we know # that there are some .xpi language packs that instead, use UTF-8. # That's against the specification, but Mozilla applications accept # it anyway, so we try to support it too. # To do this support, we read the text as being in UTF-8 # because unicode-escaped looks like ASCII files. try: content = content.decode('utf-8') except UnicodeDecodeError: raise TranslationFormatInvalidInputError( 'Content is not valid unicode-escaped text') line_num = 0 is_multi_line_comment = False last_comment = None last_comment_line_num = 0 ignore_comment = False is_message = False translation = u'' for line in content.splitlines(): # Now, to "normalize" all to the same encoding, we encode to # unicode-escape first, and then decode it to unicode # XXX: Danilo 2006-08-01: we _might_ get performance # improvements if we reimplement this to work directly, # though, it will be hard to beat C-based de/encoder. # This call unescapes everything so we don't need to care about # quotes escaping. try: string = line.encode('raw-unicode_escape') line = string.decode('unicode_escape') except UnicodeDecodeError as exception: raise TranslationFormatInvalidInputError( filename=self.filename, line_number=line_num, message=str(exception)) line_num += 1 if not is_multi_line_comment: # Remove any white space before the useful data, like # ' # foo'. line = line.lstrip() if len(line) == 0: # It's an empty line. Reset any previous comment we have. last_comment = None last_comment_line_num = 0 ignore_comment = False elif line.startswith(u'#') or line.startswith(u'//'): # It's a whole line comment. ignore_comment = False line = line[1:].strip() if last_comment: last_comment += line elif len(line) > 0: last_comment = line if last_comment and not last_comment.endswith('\n'): # Comments must end always with a new line. last_comment += '\n' last_comment_line_num = line_num continue # Unescaped URLs are a common mistake: the "//" starts an # end-of-line comment. To work around that, treat "://" as # a special case. just_saw_colon = False while line: if is_multi_line_comment: if line.startswith(u'*/'): # The comment ended, we jump the closing tag and # continue with the parsing. line = line[2:] is_multi_line_comment = False last_comment_line_num = line_num if ignore_comment: last_comment = None ignore_comment = False # Comments must end always with a new line. last_comment += '\n' elif line.startswith(self.license_block_text): # It's a comment with a licence notice, this # comment can be ignored. ignore_comment = True # Jump the whole tag line = line[len(self.license_block_text):] else: # Store the character. if last_comment is None: last_comment = line[0] elif last_comment_line_num == line_num: last_comment += line[0] else: last_comment = u'%s\n%s' % (last_comment, line[0]) last_comment_line_num = line_num # Jump the processed char. line = line[1:] continue elif line.startswith(u'/*'): # It's a multi line comment is_multi_line_comment = True ignore_comment = False last_comment_line_num = line_num # Jump the comment starting tag line = line[2:] continue elif line.startswith(u'//') and not just_saw_colon: # End-of-line comment. last_comment = '%s\n' % line[2:].strip() last_comment_line_num = line_num # On to next line. break elif is_message: # Store the char and continue. head_char = line[0] translation += head_char line = line[1:] just_saw_colon = (head_char == ':') continue elif u'=' in line: # Looks like a message string. (key, value) = line.split('=', 1) # Remove leading and trailing white spaces. key = key.strip() if valid_property_msgid(key): is_message = True # Jump the msgid, control chars and leading white # space. line = value.lstrip() continue else: raise TranslationFormatSyntaxError( line_number=line_num, message=u"invalid msgid: '%s'" % key) else: # Got a line that is not a valid message nor a valid # comment. Ignore it because main en-US.xpi catalog from # Firefox has such line/error. We follow the 'be strict # with what you export, be permisive with what you import' # policy. break if is_message: # We just parsed a message, so we need to add it to the list # of messages. if ignore_comment or last_comment_line_num < line_num - 1: # We must ignore the comment or either the comment is not # the last thing before this message or is not in the same # line as this message. last_comment = None ignore_comment = False message = TranslationMessageData() message.msgid_singular = key message.context = self.chrome_path message.file_references_list = [ "%s:%d(%s)" % (self.filename, line_num, key) ] value = translation.strip() message.addTranslation(TranslationConstants.SINGULAR_FORM, value) message.singular_text = value message.source_comment = last_comment self.messages.append(message) # Reset status vars. last_comment = None last_comment_line_num = 0 is_message = False translation = u''
def parse(self, content): """Parse given content as a property file. Once the parse is done, self.messages has a list of the available `ITranslationMessageData`s. """ # .properties files are supposed to be unicode-escaped, but we know # that there are some .xpi language packs that instead, use UTF-8. # That's against the specification, but Mozilla applications accept # it anyway, so we try to support it too. # To do this support, we read the text as being in UTF-8 # because unicode-escaped looks like ASCII files. try: content = content.decode('utf-8') except UnicodeDecodeError: raise TranslationFormatInvalidInputError, ( 'Content is not valid unicode-escaped text') line_num = 0 is_multi_line_comment = False last_comment = None last_comment_line_num = 0 ignore_comment = False is_message = False translation = u'' for line in content.splitlines(): # Now, to "normalize" all to the same encoding, we encode to # unicode-escape first, and then decode it to unicode # XXX: Danilo 2006-08-01: we _might_ get performance # improvements if we reimplement this to work directly, # though, it will be hard to beat C-based de/encoder. # This call unescapes everything so we don't need to care about # quotes escaping. try: string = line.encode('raw-unicode_escape') line = string.decode('unicode_escape') except UnicodeDecodeError as exception: raise TranslationFormatInvalidInputError( filename=self.filename, line_number=line_num, message=str(exception)) line_num += 1 if not is_multi_line_comment: # Remove any white space before the useful data, like # ' # foo'. line = line.lstrip() if len(line) == 0: # It's an empty line. Reset any previous comment we have. last_comment = None last_comment_line_num = 0 ignore_comment = False elif line.startswith(u'#') or line.startswith(u'//'): # It's a whole line comment. ignore_comment = False line = line[1:].strip() if last_comment: last_comment += line elif len(line) > 0: last_comment = line if last_comment and not last_comment.endswith('\n'): # Comments must end always with a new line. last_comment += '\n' last_comment_line_num = line_num continue # Unescaped URLs are a common mistake: the "//" starts an # end-of-line comment. To work around that, treat "://" as # a special case. just_saw_colon = False while line: if is_multi_line_comment: if line.startswith(u'*/'): # The comment ended, we jump the closing tag and # continue with the parsing. line = line[2:] is_multi_line_comment = False last_comment_line_num = line_num if ignore_comment: last_comment = None ignore_comment = False # Comments must end always with a new line. last_comment += '\n' elif line.startswith(self.license_block_text): # It's a comment with a licence notice, this # comment can be ignored. ignore_comment = True # Jump the whole tag line = line[len(self.license_block_text):] else: # Store the character. if last_comment is None: last_comment = line[0] elif last_comment_line_num == line_num: last_comment += line[0] else: last_comment = u'%s\n%s' % (last_comment, line[0]) last_comment_line_num = line_num # Jump the processed char. line = line[1:] continue elif line.startswith(u'/*'): # It's a multi line comment is_multi_line_comment = True ignore_comment = False last_comment_line_num = line_num # Jump the comment starting tag line = line[2:] continue elif line.startswith(u'//') and not just_saw_colon: # End-of-line comment. last_comment = '%s\n' % line[2:].strip() last_comment_line_num = line_num # On to next line. break elif is_message: # Store the char and continue. head_char = line[0] translation += head_char line = line[1:] just_saw_colon = (head_char == ':') continue elif u'=' in line: # Looks like a message string. (key, value) = line.split('=', 1) # Remove leading and trailing white spaces. key = key.strip() if valid_property_msgid(key): is_message = True # Jump the msgid, control chars and leading white # space. line = value.lstrip() continue else: raise TranslationFormatSyntaxError( line_number=line_num, message=u"invalid msgid: '%s'" % key) else: # Got a line that is not a valid message nor a valid # comment. Ignore it because main en-US.xpi catalog from # Firefox has such line/error. We follow the 'be strict # with what you export, be permisive with what you import' # policy. break if is_message: # We just parsed a message, so we need to add it to the list # of messages. if ignore_comment or last_comment_line_num < line_num - 1: # We must ignore the comment or either the comment is not # the last thing before this message or is not in the same # line as this message. last_comment = None ignore_comment = False message = TranslationMessageData() message.msgid_singular = key message.context = self.chrome_path message.file_references_list = [ "%s:%d(%s)" % (self.filename, line_num, key)] value = translation.strip() message.addTranslation( TranslationConstants.SINGULAR_FORM, value) message.singular_text = value message.source_comment = last_comment self.messages.append(message) # Reset status vars. last_comment = None last_comment_line_num = 0 is_message = False translation = u''
def _fetchDBRows(self, simulate_timeout=False): msgstr_joins = [ "LEFT OUTER JOIN POTranslation AS pt%d " "ON pt%d.id = TranslationMessage.msgstr%d" % (form, form, form) for form in xrange(TranslationConstants.MAX_PLURAL_FORMS)] translations = [ "pt%d.translation AS translation%d" % (form, form) for form in xrange(TranslationConstants.MAX_PLURAL_FORMS)] substitutions = { 'translation_columns': ', '.join(translations), 'translation_joins': '\n'.join(msgstr_joins), 'language': quote(self.pofile.language), 'potemplate': quote(self.pofile.potemplate), 'flag': self._getFlagName(), } sql = """ SELECT POMsgId.msgid AS msgid, POMsgID_Plural.msgid AS msgid_plural, context, date_reviewed, %(translation_columns)s FROM POTMsgSet JOIN TranslationTemplateItem ON TranslationTemplateItem.potmsgset = POTMsgSet.id AND TranslationTemplateItem.potemplate = %(potemplate)s JOIN TranslationMessage ON POTMsgSet.id=TranslationMessage.potmsgset AND ( TranslationMessage.potemplate = %(potemplate)s OR TranslationMessage.potemplate IS NULL) AND TranslationMessage.language = %(language)s %(translation_joins)s JOIN POMsgID ON POMsgID.id = POTMsgSet.msgid_singular LEFT OUTER JOIN POMsgID AS POMsgID_Plural ON POMsgID_Plural.id = POTMsgSet.msgid_plural WHERE %(flag)s IS TRUE ORDER BY TranslationTemplateItem.sequence, TranslationMessage.potemplate NULLS LAST """ % substitutions cur = cursor() try: # XXX JeroenVermeulen 2010-11-24 bug=680802: We set a # timeout to work around bug 408718, but the query is # simpler now. See if we still need this. # We have to commit what we've got so far or we'll lose # it when we hit TimeoutError. transaction.commit() if simulate_timeout: # This is used in tests. timeout = '1ms' query = "SELECT pg_sleep(2)" else: timeout = 1000 * int(config.poimport.statement_timeout) query = sql cur.execute("SET statement_timeout to %s" % quote(timeout)) cur.execute(query) except TimeoutError: # XXX JeroenVermeulen 2010-11-24 bug=680802: Log this so we # know whether it still happens. transaction.abort() return rows = cur.fetchall() assert TranslationConstants.MAX_PLURAL_FORMS == 6, ( "Change this code to support %d plural forms" % TranslationConstants.MAX_PLURAL_FORMS) for row in rows: msgid, msgid_plural, context, date = row[:4] # The last part of the row is msgstr0 .. msgstr5. Store them # in a dict indexed by the number of the plural form. msgstrs = dict(enumerate(row[4:])) key = (msgid, msgid_plural, context) if key in self.current_messages: message = self.current_messages[key] else: message = TranslationMessageData() self.current_messages[key] = message message.context = context message.msgid_singular = msgid message.msgid_plural = msgid_plural for plural in xrange(TranslationConstants.MAX_PLURAL_FORMS): msgstr = msgstrs.get(plural, None) if (msgstr is not None and ((len(message.translations) > plural and message.translations[plural] is None) or (len(message.translations) <= plural))): message.addTranslation(plural, msgstr)